library(ManyEcoEvo)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
Create orchard-style plots for both BT and Euc, without removing analyses with extreme weights:
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All") %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda) %>%
mutate(predictor_means =
map(box_cox_rating_cat, modelbased::estimate_means),
model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>%
drop_na() %>%
as_tibble()),
plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
mutate(model_data = map(model_data,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords())),
predictor_means = map(predictor_means,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords()))) %>%
mutate(plot_name =
str_remove(plot_name, "complete, ") %>%
str_replace_all(., " ", "_") %>%
paste0("_orchard_plot")) %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name),
.f = ~ plot_model_means_orchard(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3))
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
## [[1]]
##
## [[2]]
##
## [[3]]
From the above it’s clear that the general pattern is for the higher weighted cases to pull the model means closer to them. In the case of the eucalyptus data, it’s several EXTREME weights that are pulling the model means closer to them. Let’s look at the distributions of weights and their values for the euc dataset Weight Distribution Blue Tit Model:
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "blue tit") %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda, data) %>% pull(box_cox_rating_cat) %>%
pluck(1) %>%
model.frame() %>%
as_tibble() %>%
rename(weights = `(weights)`) %>%
mutate(weights = as.numeric(weights)) %>%
ggplot(aes(x = weights)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Box-Cox Weights",
x = "Box-Cox Weights",
y = "Frequency") +
theme_minimal() +
theme(legend.position = "none")
Weight Distribution Eucalyptus Model
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda, data) %>%
pull(box_cox_rating_cat) %>%
pluck(1) %>%
model.frame() %>%
as_tibble() %>%
rename(weights = `(weights)`) %>%
mutate(weights = as.numeric(weights)) %>%
ggplot(aes(x = weights)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Box-Cox Weights",
x = "Box-Cox Weights",
y = "Frequency") +
theme_minimal() +
theme(legend.position = "none")
Weight Distribution for Eucalyptus model when case with maximum weight removed
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda, data) %>%
pull(box_cox_rating_cat) %>%
pluck(1) %>%
model.frame() %>%
as_tibble() %>%
rename(weights = `(weights)`) %>%
mutate(weights = as.numeric(weights)) %>%
filter(weights != max(weights)) %>%
ggplot(aes(x = weights)) +
geom_histogram(bins = 30) +
labs(title = "Distribution of Box-Cox Weights",
x = "Box-Cox Weights",
y = "Frequency") +
theme_minimal() +
theme(legend.position = "none")
Let’s see all values of weight
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda, data) %>%
pull(box_cox_rating_cat) %>%
pluck(1) %>%
model.frame() %>%
as_tibble() %>%
rename(weights = `(weights)`) %>%
mutate(weights = as.numeric(weights)) %>%
pluck("weights", unique, sort) # all weights
## [1] 563686395 563717442 563749404 563792386 565399621
## [6] 565829955 565935996 566268848 567997367 569333108
## [11] 574528639 575196234 575387867 575722251 577600776
## [16] 579285551 579948308 580899020 583278546 584152556
## [21] 585498693 588256091 589433325 591917249 592900308
## [26] 607544276 617833974 622796581 626556210 633218858
## [31] 634782083 639555963 645135519 675024708 679607659
## [36] 679917410 685839966 694551570 731532604 810363823
## [41] 841303906 848538448 872489778 883906458 889910295
## [46] 890972369 919288483 933683584 962902173 1041096068
## [51] 1105126672 1155109828 1175856985 1373998220 1553121885
## [56] 1652915383 1739127212 1859021802 1921555550 1932590106
## [61] 2160580342 2185971274 2284404564 3627724910 4623841569
## [66] 5082015565 5099533477 5127083966 5313360448 5637576773
## [71] 5749335304 5767498603 5986636683 6027266130 7006392797
## [76] 9979626599 11545920175 708161001050
What are the cases with extreme weights? Below we identify the cases with the top 2 maximum weights
extreme_weight_observation <-
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
pluck("effects_analysis", 1) %>%
select(study_id, box_cox_abs_deviation_score_estimate, box_cox_var) %>%
mutate(weights = 1/box_cox_var) %>%
# filter(weights == max(weights))
filter(weights > 10000000000)
extreme_weight_observation
## # A tibble: 2 × 4
## study_id box_cox_abs_deviation_score_estimate box_cox_var weights
## <chr> <dbl> <dbl> <dbl>
## 1 Brooklyn-2-2-1 1.48 1.41e-12 708161001050.
## 2 Cassilis-1-1-1 -1.11 8.66e-11 11545920175.
Remove cases with extreme weights and refit / plot models
refitted_eucalyptus_model <-
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
mutate(effects_analysis = map(effects_analysis, ~filter(.x, study_id %nin% extreme_weight_observation$study_id))) %>%
mutate(box_cox_rating_cat = map(effects_analysis, ~fit_boxcox_ratings_cat(
.data = .x,
outcome = box_cox_abs_deviation_score_estimate,
outcome_var = box_cox_var,
interceptless = FALSE
)))
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## boundary (singular) fit: see help('isSingular')
Note, not shown here yet. But when only the top most case is removed, lme4 gives singular fit warning, when I run check_singularity() on the model, it seems OK. When the top two cases are removed, singular fit warning is given by lme4, and check_singularity() returns FALSE, meaning the model is singular. Also, when I check convergence on the model with top 2 cases removed, gradient shifts from ~3.79e^-5 with only case with max weight removed to 0. Seems strange to me that this would occur when both extreme weights are removed… Anyways, I progress with refitting and plotting after removing cases with weights 10000000000 and above (here, there are two). Recreate the plot data for regenerating plots after refitting models
refitted_plot_data<-
refitted_eucalyptus_model %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda) %>%
mutate(predictor_means =
map(box_cox_rating_cat, modelbased::estimate_means),
model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>%
drop_na() %>%
as_tibble()),
plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
mutate(model_data = map(model_data,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords())),
predictor_means = map(predictor_means,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords()))) %>%
mutate(plot_name =
str_remove(plot_name, "complete, ") %>%
str_replace_all(., " ", "_") %>%
paste0("_violin_plot"))
## We selected `at = c("PublishableAsIs")`.
Create violin plots for refitted models
refitted_plot_data %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
.f = ~ plot_model_means_box_cox_cat(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3, ..4, back_transform = FALSE))
## [[1]]
Regenerate plot but back-transform to original scale (absolute deviation from meta-analytic mean)
refitted_plot_data %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
.f = ~ plot_model_means_box_cox_cat(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3, ..4, back_transform = TRUE))
## [[1]]
Create orchard-style plots for refitted models
refitted_plot_data %>%
mutate(plot_name = str_replace(plot_name, "_violin_plot", "_orchard_plot")) %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name),
.f = ~ plot_model_means_orchard(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3))
## [[1]]
What is causing these extreme values observed in
extreme_weight_observation? Is the weight calculation
ok?
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All",
dataset == "eucalyptus") %>%
select(effects_analysis) %>%
unnest(effects_analysis) %>%
filter(study_id %in% extreme_weight_observation$study_id) %>%
select(study_id,
Zr,
VZr,
abs_deviation_score_estimate,
box_cox_abs_deviation_score_estimate,
folded_mu_val,
folded_v_val,
box_cox_var,
lambda) %>%
mutate(weights = 1/box_cox_var)
## # A tibble: 2 × 10
## study_id Zr VZr abs_deviation_score_est…¹ box_cox_abs_deviatio…²
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Brooklyn-2-2-1 -4.47 0.00870 4.38 1.48
## 2 Cassilis-1-1-1 0.236 0.00300 0.328 -1.11
## # ℹ abbreviated names: ¹abs_deviation_score_estimate,
## # ²box_cox_abs_deviation_score_estimate
## # ℹ 5 more variables: folded_mu_val <dbl>, folded_v_val <dbl>,
## # box_cox_var <dbl>, lambda <dbl>, weights <dbl>
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All") %>%
select(effects_analysis) %>%
unnest(effects_analysis) %>%
unnest(review_data) %>%
mutate(weights = 1/box_cox_var) %>%
write_csv("extreme_weight_analysis_info.csv")
The weights are calculated as the inverse of the
variance of the box-cox transformed absolute deviation scores. The
variance of the box-cox transformed absolute deviation scores
box_cox_var is calculated as the variance of the folded
absolute deviation scores and the folded variance of the absolute
deviation scores The folded absolute deviation scores
folded_mu and the The folded variance of the absolute
deviation scores folded_v are calculated as follows:
box_cox_transform
## function(data, dataset) {
## if(rlang::is_na(data) | rlang::is_null(data)){
## cli::cli_alert_warning(text = glue::glue("Cannot box-cox transform data for",
## paste(names(dplyr::cur_group()),
## dplyr::cur_group(),
## sep = " = ",
## collapse = ", ")))
## result <- NA
## } else{
## cli::cli_h2(glue::glue("Box-cox transforming absolute deviation scores for ", {dataset}))
##
## box_cox_recipe <- recipes::recipe(~.,
## data = select(data,
## starts_with("abs_deviation_score_"))) %>%
## timetk::step_box_cox(everything(),limits = c(0,10)) %>%
## recipes::prep(training = data, retain = TRUE) #estimate lambda + box cox transform vars
##
## if(box_cox_recipe %>%
## recipes::tidy(number = 1) %>% nrow() > 0){
## lambda <- box_cox_recipe %>%
## recipes::tidy(number = 1) %>%
## pull(., lambda) %>%
## `names<-`(., pull(box_cox_recipe %>%
## recipes::tidy(number = 1), terms))
##
## if(!is.null(dataset)){
## cli::cli_alert_info(c(
## "Optimised Lambda used in Box-Cox Transformation of ",
## "{dataset} dataset variables ",
## "is {round(lambda, 4)} for `{names(lambda)}`."
## ))
## }
##
## variance_box_cox <- function(folded_mu, folded_v, lambda){
## variance_bc <- folded_v*(lambda*folded_mu^(lambda-1))^2 # delta method
## return(variance_bc)
## }
##
## # folded abs_dev_score
## folded_mu <- function(abs_dev_score, VZr) {
## mu <- abs_dev_score
## sigma <- sqrt(VZr)
## fold_mu <- sigma * sqrt(2/pi) * exp((-mu^2)/(2 * sigma^2)) + mu * (1 - 2 * pnorm(-mu/sigma))
## fold_mu
## }
##
## # folded VZr
## folded_v <- function(abs_dev_score, VZr) {
## mu <- abs_dev_score
## sigma <- sqrt(VZr)
## fold_mu <- sigma * sqrt(2/pi) * exp((-mu^2)/(2 * sigma^2)) + mu * (1 - 2 * pnorm(-mu/sigma))
## fold_se <- sqrt(mu^2 + sigma^2 - fold_mu^2)
## # adding se to make bigger abs_dev_score
## fold_v <- fold_se^2
## fold_v
## }
##
## Z_colname <- data %>% colnames %>% keep(., str_starts(., "Z"))
## VZ_colname <- data %>% colnames %>% keep(., str_starts(., "VZ"))
## result <- recipes::juice(box_cox_recipe) %>%
## rename_with(.fn = ~ paste0("box_cox_", .x)) %>%
## bind_cols(data, .) %>%
## mutate(folded_mu_val = folded_mu(abs_deviation_score_estimate, !!as.name(VZ_colname)),
## folded_v_val = folded_v(abs_deviation_score_estimate, !!as.name(VZ_colname)),
## box_cox_var = variance_box_cox(folded_mu_val, folded_v_val, lambda[[1]]),
## lambda = lambda[[1]])
##
## } else{
## result <- NA
## cli::cli_alert_warning(text = glue::glue("Lambda cannot be computed."))
## }
##
##
## }
##
## return(result)
##
## }
## <bytecode: 0x11c6f58d0>
## <environment: namespace:ManyEcoEvo>
refitted_model_folded_v <-
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All") %>%
mutate(box_cox_rating_cat = map(effects_analysis, ~fit_boxcox_ratings_cat(
.data = .x,
outcome = box_cox_abs_deviation_score_estimate,
outcome_var = folded_v_val,
interceptless = FALSE
)))
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
refitted_plot_data_folded_v <-
refitted_model_folded_v %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda) %>%
mutate(predictor_means =
map(box_cox_rating_cat, modelbased::estimate_means),
model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>%
drop_na() %>%
as_tibble()),
plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
mutate(model_data = map(model_data,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords())),
predictor_means = map(predictor_means,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords()))) %>%
mutate(plot_name =
str_remove(plot_name, "complete, ") %>%
str_replace_all(., " ", "_") %>%
paste0("_violin_plot"))
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
refitted_plot_data_folded_v %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
.f = ~ plot_model_means_box_cox_cat(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3, ..4, back_transform = FALSE))
## [[1]]
##
## [[2]]
##
## [[3]]
fit_boxcox_ratings_cat to use
folded_v instead of box_cox_var to calculate
weightsfit_boxcox_ratings_cat_folded_v <-
function(.data, outcome, outcome_var, interceptless = FALSE) {
cli::cli_h2(glue::glue("Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes"))
# Example Usage:
# library(tidyverse);library(targets);library(metafor)
# tar_load(meta_analysis_outputs)
# meta_analysis_outputs$data[[1]] %>%
# fit_boxcox_ratings_cat(.,
# outcome = box_cox_abs_deviation_score_estimate,
# outcome_var = VZr, interceptless = FALSE)
# TODO @egouldo stopifnot data doesn't contain variables named eval(box_cox_outcome_var), eval(sampling_variance_var), review_data
# TODO @egouldo unnest and then check stopifnot: RateAnalysis, ReviewerId, study_id.
data_tbl <-
.data %>%
unnest(cols = c(review_data)) %>%
select(study_id,
ReviewerId,
PublishableAsIs,
starts_with("box_cox_"), #@TODO - delete this row?
{{outcome}},
{{outcome_var}}) %>%
ungroup() %>%
mutate(PublishableAsIs = forcats::fct_relevel(PublishableAsIs,c("deeply flawed and unpublishable",
"publishable with major revision",
"publishable with minor revision",
"publishable as is")),
obs_id = 1:n())
if (interceptless == FALSE) {
f <- rlang::new_formula(rlang::ensym(outcome),
expr(PublishableAsIs +
(1 | ReviewerId) #+ (1 | study_id ) #RE omitted due to convergence issues
))
mod <- lme4::lmer(f,
data = data_tbl,
weights = I(1/pull(data_tbl,{{outcome_var}}))
)
} else (#interceptless: for plotting
mod <- lme4::lmer(rlang::new_formula(rlang::ensym(outcome),
expr(-1 + PublishableAsIs + (1 | ReviewerId))), #+ (1 | study_id) #problem with the groups
data = data_tbl #,
# weights = I(1/pull(data_tbl,{{outcome_var}}))
)
)
return(mod)
}
# apply
refitted_models_folded_v <-
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All") %>%
mutate(box_cox_rating_cat = map(effects_analysis, ~fit_boxcox_ratings_cat_folded_v(
.data = .x,
outcome = box_cox_abs_deviation_score_estimate,
outcome_var = folded_v_val,
interceptless = FALSE
)))
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
refitted_plot_data_folded_v <-
refitted_models_folded_v %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda) %>%
mutate(predictor_means =
map(box_cox_rating_cat, modelbased::estimate_means),
model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>%
drop_na() %>%
as_tibble()),
plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
mutate(model_data = map(model_data,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords())),
predictor_means = map(predictor_means,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords()))) %>%
mutate(plot_name =
str_remove(plot_name, "complete, ") %>%
str_replace_all(., " ", "_") %>%
paste0("_violin_plot"))
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
refitted_plot_data_folded_v %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
.f = ~ plot_model_means_box_cox_cat(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3, ..4, back_transform = FALSE))
## [[1]]
##
## [[2]]
##
## [[3]]
now without taking the inverse of the folded_v
fit_boxcox_ratings_cat_folded_v_no_inv <-
function(.data, outcome, outcome_var, interceptless = FALSE) {
cli::cli_h2(glue::glue("Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes"))
# Example Usage:
# library(tidyverse);library(targets);library(metafor)
# tar_load(meta_analysis_outputs)
# meta_analysis_outputs$data[[1]] %>%
# fit_boxcox_ratings_cat(.,
# outcome = box_cox_abs_deviation_score_estimate,
# outcome_var = VZr, interceptless = FALSE)
# TODO @egouldo stopifnot data doesn't contain variables named eval(box_cox_outcome_var), eval(sampling_variance_var), review_data
# TODO @egouldo unnest and then check stopifnot: RateAnalysis, ReviewerId, study_id.
data_tbl <-
.data %>%
unnest(cols = c(review_data)) %>%
select(study_id,
ReviewerId,
PublishableAsIs,
starts_with("box_cox_"), #@TODO - delete this row?
{{outcome}},
{{outcome_var}}) %>%
ungroup() %>%
mutate(PublishableAsIs = forcats::fct_relevel(PublishableAsIs,c("deeply flawed and unpublishable",
"publishable with major revision",
"publishable with minor revision",
"publishable as is")),
obs_id = 1:n())
if (interceptless == FALSE) {
f <- rlang::new_formula(rlang::ensym(outcome),
expr(PublishableAsIs +
(1 | ReviewerId) #+ (1 | study_id ) #RE omitted due to convergence issues
))
mod <- lme4::lmer(f,
data = data_tbl,
weights = I(pull(data_tbl,{{outcome_var}}))
)
} else (#interceptless: for plotting
mod <- lme4::lmer(rlang::new_formula(rlang::ensym(outcome),
expr(-1 + PublishableAsIs + (1 | ReviewerId))), #+ (1 | study_id) #problem with the groups
data = data_tbl #,
# weights = I(1/pull(data_tbl,{{outcome_var}}))
)
)
return(mod)
}
# apply
refitted_models_folded_v_no_inv <-
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All") %>%
mutate(box_cox_rating_cat = map(effects_analysis, ~fit_boxcox_ratings_cat_folded_v_no_inv(
.data = .x,
outcome = box_cox_abs_deviation_score_estimate,
outcome_var = folded_v_val,
interceptless = FALSE
)))
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
refitted_plot_data_folded_v_no_inv <-
refitted_models_folded_v_no_inv %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda) %>%
mutate(predictor_means =
map(box_cox_rating_cat, modelbased::estimate_means),
model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>%
drop_na() %>%
as_tibble()),
plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
mutate(model_data = map(model_data,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords())),
predictor_means = map(predictor_means,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords()))) %>%
mutate(plot_name =
str_remove(plot_name, "complete, ") %>%
str_replace_all(., " ", "_") %>%
paste0("_violin_plot"))
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
## Warning: There were 5 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `predictor_means = map(box_cox_rating_cat,
## modelbased::estimate_means)`.
## Caused by warning in `.qf.non0()`:
## ! Negative variance estimate obtained!
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
refitted_plot_data_folded_v_no_inv %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
.f = ~ plot_model_means_box_cox_cat(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3, ..4, back_transform = FALSE))
## [[1]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_segment()`).
##
## [[2]]
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_segment()`).
##
## [[3]]
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_segment()`).
# no weights
fit_boxcox_ratings_cat to use no weights instead
of box_cox_var or folded_v to calculate
weightsfit_boxcox_ratings_cat_no_weights <-
function(.data, outcome, outcome_var, interceptless = FALSE) {
cli::cli_h2(glue::glue("Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes"))
# Example Usage:
# library(tidyverse);library(targets);library(metafor)
# tar_load(meta_analysis_outputs)
# meta_analysis_outputs$data[[1]] %>%
# fit_boxcox_ratings_cat(.,
# outcome = box_cox_abs_deviation_score_estimate,
# outcome_var = VZr, interceptless = FALSE)
# TODO @egouldo stopifnot data doesn't contain variables named eval(box_cox_outcome_var), eval(sampling_variance_var), review_data
# TODO @egouldo unnest and then check stopifnot: RateAnalysis, ReviewerId, study_id.
data_tbl <-
.data %>%
unnest(cols = c(review_data)) %>%
select(study_id,
ReviewerId,
PublishableAsIs,
starts_with("box_cox_"), #@TODO - delete this row?
{{outcome}},
{{outcome_var}}) %>%
ungroup() %>%
mutate(PublishableAsIs = forcats::fct_relevel(PublishableAsIs,c("deeply flawed and unpublishable",
"publishable with major revision",
"publishable with minor revision",
"publishable as is")),
obs_id = 1:n())
if (interceptless == FALSE) {
f <- rlang::new_formula(rlang::ensym(outcome),
expr(PublishableAsIs +
(1 | ReviewerId) #+ (1 | study_id ) #RE omitted due to convergence issues
))
mod <- lme4::lmer(f,
data = data_tbl
)
} else (#interceptless: for plotting
mod <- lme4::lmer(rlang::new_formula(rlang::ensym(outcome),
expr(-1 + PublishableAsIs + (1 | ReviewerId))), #+ (1 | study_id) #problem with the groups
data = data_tbl #,
# weights = I(1/pull(data_tbl,{{outcome_var}}))
)
)
return(mod)
}
# apply
refitted_models_no_weights <-
ManyEcoEvo_results %>%
ungroup %>%
filter(exclusion_set == "complete",
publishable_subset == "All",
expertise_subset == "All") %>%
mutate(box_cox_rating_cat = map(effects_analysis, ~fit_boxcox_ratings_cat_no_weights(
.data = .x,
outcome = box_cox_abs_deviation_score_estimate,
outcome_var = folded_v_val,
interceptless = FALSE
)))
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
##
refitted_plot_data_no_weights <-
refitted_models_no_weights %>%
hoist(.col = effects_analysis,
"lambda",
.simplify = TRUE,
.transform = unique) %>%
select(exclusion_set,
dataset,
estimate_type,
box_cox_rating_cat,
lambda) %>%
mutate(predictor_means =
map(box_cox_rating_cat, modelbased::estimate_means),
model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>%
drop_na() %>%
as_tibble()),
plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
mutate(model_data = map(model_data,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords())),
predictor_means = map(predictor_means,
.f = ~ mutate(.x, PublishableAsIs =
str_replace(PublishableAsIs,
"publishable with ", "") %>%
str_replace("deeply flawed and ", "") %>%
capwords()))) %>%
mutate(plot_name =
str_remove(plot_name, "complete, ") %>%
str_replace_all(., " ", "_") %>%
paste0("_violin_plot"))
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
refitted_plot_data_no_weights %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
.f = ~ plot_model_means_box_cox_cat(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3, ..4, back_transform = FALSE))
## [[1]]
##
## [[2]]
##
## [[3]]
comparison of different weight usage
# TODO: not sure if refitted_plot_data_folded_v and refitted_plot_data_no_weights are actually different, check code implementation!!
refitted_plot_data_no_weights %>% slice(1) %>%
bind_rows(refitted_plot_data_folded_v %>%
slice(1)) %>%
mutate(weights_type = c("no_weights", "folded_v")) %>%
pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
.f = ~ plot_model_means_box_cox_cat(..1,
PublishableAsIs,
..2,
new_order =
c("Unpublishable",
"Major Revision",
"Minor Revision",
"Publishable As Is"),
..3, ..4, back_transform = FALSE))
## [[1]]
##
## [[2]]
model fit stats??
refitted_plot_data_no_weights %>% slice(1) %>%
bind_rows(refitted_plot_data_folded_v %>%
slice(1)) %>%
mutate(weights_type = c("no_weights", "folded_v")) %>% pull(box_cox_rating_cat) %>% map(summary)
## [[1]]
## Linear mixed model fit by REML ['lmerMod']
## Formula: box_cox_abs_deviation_score_estimate ~ PublishableAsIs + (1 |
## ReviewerId)
## Data: data_tbl
##
## REML criterion at convergence: 726.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2924 -0.5437 0.1834 0.6490 2.8283
##
## Random effects:
## Groups Name Variance Std.Dev.
## ReviewerId (Intercept) 0.02269 0.1506
## Residual 0.24798 0.4980
## Number of obs: 473, groups: ReviewerId, 68
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -1.1085 0.1118 -9.910
## PublishableAsIspublishable with major revision -0.1915 0.1179 -1.625
## PublishableAsIspublishable with minor revision -0.1922 0.1163 -1.653
## PublishableAsIspublishable as is -0.1323 0.1296 -1.021
##
## Correlation of Fixed Effects:
## (Intr) PblshblAsIspblshblwthmjr
## PblshblAsIspblshblwthmjr -0.916
## PblshblAsIspblshblwthmnr -0.941 0.879
## PblshblAIai -0.846 0.793
## PblshblAsIspblshblwthmnr
## PblshblAsIspblshblwthmjr
## PblshblAsIspblshblwthmnr
## PblshblAIai 0.814
##
## [[2]]
## Linear mixed model fit by REML ['lmerMod']
## Formula: box_cox_abs_deviation_score_estimate ~ PublishableAsIs + (1 |
## ReviewerId)
## Data: data_tbl
## Weights: I(1/pull(data_tbl, { { outcome_var } }))
##
## REML criterion at convergence: 1027.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7143 -0.3039 0.2675 0.5748 4.3435
##
## Random effects:
## Groups Name Variance Std.Dev.
## ReviewerId (Intercept) 0.07302 0.2702
## Residual 173.88395 13.1865
## Number of obs: 473, groups: ReviewerId, 68
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -1.39527 0.14162 -9.852
## PublishableAsIspublishable with major revision -0.08558 0.14714 -0.582
## PublishableAsIspublishable with minor revision -0.03327 0.14603 -0.228
## PublishableAsIspublishable as is 0.21524 0.15861 1.357
##
## Correlation of Fixed Effects:
## (Intr) PblshblAsIspblshblwthmjr
## PblshblAsIspblshblwthmjr -0.900
## PblshblAsIspblshblwthmnr -0.932 0.880
## PblshblAIai -0.847 0.808
## PblshblAsIspblshblwthmnr
## PblshblAsIspblshblwthmjr
## PblshblAsIspblshblwthmnr
## PblshblAIai 0.838
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.4.0 (2024-04-24)
## os macOS Ventura 13.6.6
## system aarch64, darwin20
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Australia/Melbourne
## date 2024-06-14
## pandoc 3.1.12.2 @ /opt/homebrew/bin/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## ! package * version date (UTC) lib source
## backports 1.5.0 2024-05-23 [1] CRAN (R 4.4.0)
## beeswarm 0.4.0 2021-06-01 [1] CRAN (R 4.4.0)
## bit 4.0.5 2022-11-15 [1] CRAN (R 4.4.0)
## bit64 4.0.5 2020-08-30 [1] CRAN (R 4.4.0)
## blastula 0.3.5 2024-02-24 [1] CRAN (R 4.4.0)
## P boot 1.3-30 2024-02-26 [4] CRAN (R 4.4.0)
## broom 1.0.6 2024-05-17 [1] CRAN (R 4.4.0)
## bslib 0.7.0 2024-03-29 [1] CRAN (R 4.4.0)
## cachem 1.1.0 2024-05-16 [1] CRAN (R 4.4.0)
## cli 3.6.2 2023-12-11 [1] CRAN (R 4.4.0)
## coda 0.19-4.1 2024-01-31 [1] CRAN (R 4.4.0)
## P codetools 0.2-20 2024-03-31 [4] CRAN (R 4.4.0)
## colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.4.0)
## crayon 1.5.2 2022-09-29 [1] CRAN (R 4.4.0)
## datawizard 0.10.0 2024-03-26 [1] CRAN (R 4.4.0)
## devtools 2.4.5 2022-10-11 [1] CRAN (R 4.4.0)
## digest 0.6.35 2024-03-11 [1] CRAN (R 4.4.0)
## dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.4.0)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.4.0)
## emmeans 1.10.2 2024-05-20 [1] CRAN (R 4.4.0)
## EnvStats 2.8.1 2023-08-22 [1] CRAN (R 4.4.0)
## estimability 1.5.1 2024-05-12 [1] CRAN (R 4.4.0)
## evaluate 0.23 2023-11-01 [1] CRAN (R 4.4.0)
## fansi 1.0.6 2023-12-08 [1] CRAN (R 4.4.0)
## farver 2.1.2 2024-05-13 [1] CRAN (R 4.4.0)
## fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.4.0)
## forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.4.0)
## fs 1.6.4 2024-04-25 [1] CRAN (R 4.4.0)
## generics 0.1.3 2022-07-05 [1] CRAN (R 4.4.0)
## ggbeeswarm 0.7.2 2023-04-29 [1] CRAN (R 4.4.0)
## ggplot2 * 3.5.1 2024-04-23 [1] CRAN (R 4.4.0)
## glue 1.7.0 2024-01-09 [1] CRAN (R 4.4.0)
## gtable 0.3.5 2024-04-22 [1] CRAN (R 4.4.0)
## hardhat 1.3.1 2024-02-02 [1] CRAN (R 4.4.0)
## highr 0.10 2022-12-22 [1] CRAN (R 4.4.0)
## hms 1.1.3 2023-03-21 [1] CRAN (R 4.4.0)
## htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
## htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.4.0)
## httpuv 1.6.15 2024-03-26 [1] CRAN (R 4.4.0)
## insight 0.19.11 2024-05-12 [1] CRAN (R 4.4.0)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.4.0)
## jsonlite 1.8.8 2023-12-04 [1] CRAN (R 4.4.0)
## knitr 1.46 2024-04-06 [1] CRAN (R 4.4.0)
## labeling 0.4.3 2023-08-29 [1] CRAN (R 4.4.0)
## later 1.3.2 2023-12-06 [1] CRAN (R 4.4.0)
## P lattice 0.22-6 2024-03-20 [4] CRAN (R 4.4.0)
## lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.4.0)
## lme4 * 1.1-35.3 2024-04-16 [1] CRAN (R 4.4.0)
## lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.4.0)
## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.4.0)
## ManyEcoEvo * 1.2.0.9000 2024-06-13 [1] local
## P MASS 7.3-60.2 2024-04-24 [4] local
## mathjaxr 1.6-0 2022-02-28 [1] CRAN (R 4.4.0)
## P Matrix * 1.7-0 2024-03-22 [4] CRAN (R 4.4.0)
## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.4.0)
## metadat 1.2-0 2022-04-06 [1] CRAN (R 4.4.0)
## metafor 4.6-0 2024-03-28 [1] CRAN (R 4.4.0)
## mime 0.12 2021-09-28 [1] CRAN (R 4.4.0)
## miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.4.0)
## minqa 1.2.7 2024-05-20 [1] CRAN (R 4.4.0)
## modelbased 0.8.7 2024-02-15 [1] CRAN (R 4.4.0)
## multcomp 1.4-25 2023-06-20 [4] CRAN (R 4.4.0)
## munsell 0.5.1 2024-04-01 [1] CRAN (R 4.4.0)
## mvtnorm 1.2-5 2024-05-21 [1] CRAN (R 4.4.0)
## P nlme 3.1-164 2023-11-27 [4] CRAN (R 4.4.0)
## nloptr 2.0.3 2022-05-26 [1] CRAN (R 4.4.0)
## numDeriv 2016.8-1.1 2019-06-06 [1] CRAN (R 4.4.0)
## parsnip 1.2.1 2024-03-22 [1] CRAN (R 4.4.0)
## pbkrtest 0.5.2 2023-01-19 [1] CRAN (R 4.4.0)
## pillar 1.9.0 2023-03-22 [1] CRAN (R 4.4.0)
## pkgbuild 1.4.4 2024-03-17 [1] CRAN (R 4.4.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.4.0)
## pkgload 1.3.4 2024-01-16 [1] CRAN (R 4.4.0)
## pointblank 0.12.1 2024-03-25 [1] CRAN (R 4.4.0)
## profvis 0.3.8 2023-05-02 [1] CRAN (R 4.4.0)
## promises 1.3.0 2024-04-05 [1] CRAN (R 4.4.0)
## purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.4.0)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.4.0)
## Rcpp 1.0.12 2024-01-09 [1] CRAN (R 4.4.0)
## readr * 2.1.5 2024-01-10 [1] CRAN (R 4.4.0)
## remotes 2.5.0 2024-03-17 [1] CRAN (R 4.4.0)
## rlang 1.1.3 2024-01-10 [1] CRAN (R 4.4.0)
## rmarkdown 2.27 2024-05-17 [1] CRAN (R 4.4.0)
## rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.4.0)
## sae 1.3 2020-03-01 [1] CRAN (R 4.4.0)
## sandwich 3.1-0 2023-12-11 [4] CRAN (R 4.4.0)
## sass 0.4.9 2024-03-15 [1] CRAN (R 4.4.0)
## scales 1.3.0 2023-11-28 [1] CRAN (R 4.4.0)
## see 0.8.4 2024-04-29 [1] CRAN (R 4.4.0)
## sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.4.0)
## shiny 1.8.1.1 2024-04-02 [1] CRAN (R 4.4.0)
## stringi 1.8.4 2024-05-06 [1] CRAN (R 4.4.0)
## stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.4.0)
## P survival 3.5-8 2024-02-14 [4] CRAN (R 4.4.0)
## TH.data 1.1-2 2023-04-17 [4] CRAN (R 4.4.0)
## tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.4.0)
## tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.4.0)
## tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.4.0)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.4.0)
## timechange 0.3.0 2024-01-18 [1] CRAN (R 4.4.0)
## tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.4.0)
## urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.4.0)
## usethis 2.2.3 2024-02-19 [1] CRAN (R 4.4.0)
## utf8 1.2.4 2023-10-22 [1] CRAN (R 4.4.0)
## vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.4.0)
## vipor 0.4.7 2023-12-18 [1] CRAN (R 4.4.0)
## vroom 1.6.5 2023-12-05 [1] CRAN (R 4.4.0)
## withr 3.0.0 2024-01-16 [1] CRAN (R 4.4.0)
## xfun 0.44 2024-05-15 [1] CRAN (R 4.4.0)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.4.0)
## yaml 2.3.8 2023-12-11 [1] CRAN (R 4.4.0)
## zoo 1.8-12 2023-04-13 [1] CRAN (R 4.4.0)
##
## [1] /Users/elliotgould/Library/Caches/org.R-project.R/R/renv/library/ManyEcoEvo-0bdd0026/macos/R-4.4/aarch64-apple-darwin20
## [2] /Users/elliotgould/Library/Caches/org.R-project.R/R/renv/sandbox/macos/R-4.4/aarch64-apple-darwin20/f7156815
## [3] /Users/elliotgould/Library/Caches/org.R-project.R/R/renv/sandbox/R-4.4/aarch64-apple-darwin20/84ba8b13
## [4] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
##
## P ── Loaded and on-disk path mismatch.
##
## ──────────────────────────────────────────────────────────────────────────────